penguin_pca <- penguins %>%
select(body_mass_g, ends_with("_mm")) %>% # helper function for select
drop_na() %>% # drop na obs from any column
scale() %>%
prcomp()
penguin_pca$rotation # loadings are here for the PCs
## PC1 PC2 PC3 PC4
## body_mass_g 0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm 0.4552503 0.597031143 0.6443012 -0.1455231
## bill_depth_mm -0.4003347 0.797766572 -0.4184272 0.1679860
## flipper_length_mm 0.5760133 0.002282201 -0.2320840 0.7837987
penguin_complete <- penguins %>%
drop_na(body_mass_g, ends_with("_mm"))
autoplot(penguin_pca,
data = penguin_complete,
colour = "species",
loadings = TRUE,
loadings.label = TRUE) +
theme_minimal() # can add ggplot functions to autoplot, can also change arrow size and other things.
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# other ways to make pca biplots in r as well.
fish_noaa <- read_excel(here("data", "foss_landings.xlsx")) %>%
clean_names() %>% # default is to convert all col names to snake case
mutate(across(where(is.character), tolower)) %>% # for any char col use func `to_lower`
mutate(nmfs_name = str_sub(nmfs_name, end = -4)) %>% # Here we are removing the last 4 characters from the col.
filter(confidentiality == "public")
# usually someone has already written a function you want to use.
Make a customized graph
fish_plot <- ggplot(data = fish_noaa, aes(x = year, y = pounds)) +
geom_line(aes(color = nmfs_name), show.legend = FALSE) +
theme_minimal()
fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).
ggplotly(fish_plot)